Language/OS - Multiplatform Resource Library

home *** CD-ROM | disk | FTP | other *** search

/ Language/OS - Multiplatform Resource Library / LANGUAGE OS.iso / oper_sys / quartz / quartz10.lha / src / runtime / profile.c < prev next >

Wrap

C/C++ Source or Header | 1990-05-18 | 12KB | 558 lines

#include <stdio.h> #include "thread.h" #include "synch.h" #include <usclkc.h> #include "quartzcommon.h" #include "profile.h" #include "internal.h" #ifndef InitOverflowSize #define InitOverflowSize 5000 #endif #define InitNumObjs 10 #define InitNumKids 10 /* These get set by munge */ private int mNumProcIds = 0; private int mEndOfText = 0; /* communication from the computation -> sampling processors */ private int profileOn = FALSE; shared int effectiveParallelism = 0; shared int nominalParallelism = 0; shared int profileOver = FALSE; shared Processor processorList[NUMPROCS]; /* Sampled data -- output to a file */ shared FLOAT timeDiff = 0; shared int numSamples = 0; shared ConcurrentData *procData; shared GraphEntry *pcTable; /* needed by mcount */ shared int pcTableSize; /* Needed internally to control profiling */ static shared SpinLock *pcTableLocks; static shared GraphEntry overflowFirst[InitOverflowSize]; static shared GraphEntry *overflow; static shared SpinLock overflowLock; static shared int overflowSize = 0; static shared int overflowOccurred = 0; static shared SynchSamples *objData; static shared int objNum = 0; static shared SpinLock objLock; static shared ChildData *kidData; static shared int kidNum = 0; static shared SpinLock kidLock; private int iteration = 1; private unsigned int tmpStack[InitIdStackSize]; private int myHit; static shared int start = FALSE; static shared usclk_t startTime; static shared endCount; static shared SpinLock startLock; void OutOfRoom(); /* Initialization routines */ /* Init profiling data structures */ static void OverflowSetup (o) GraphEntry *o; { GraphTableInit(o, InitOverflowSize); overflow = o; overflowSize = InitOverflowSize; overflowOccurred = 0; } void ProcessorListInit () { int i; Thread *t; Processor *p; for (i = 0; i < NUMPROCS; i++) { p = &processorList[i]; t = &p->idleThread; t->type = ThreadType; t->idStack.base = &p->idStack[0]; if (i == 0) t->idStack.base->procID = StartID | BusyState; else t->idStack.base->procID = StartID | SpinState; t->idStack.top = t->idStack.base; t->idStack.limit = t->idStack.base + InitIdStackSize - 1; p->curThread = t; p->synchList = NULL; SLNPInit(&p->profLock); p->numSamples = 0; } } /* External entry point to initialize external data structures */ void ProfileInit (numProfilers) int numProfilers; { int i; if (mEndOfText == 0) { fprintf(stderr, "Unable to profile: munge not run on object\n"); exit(0); } pcTableSize = (mEndOfText + sizeof(GraphEntry)) / sizeof(GraphEntry); pcTable = MyShmalloc(GraphEntry, pcTableSize); GraphTableInit(pcTable, pcTableSize); pcTableLocks = MyShmalloc(SpinLock, pcTableSize); for (i = 0; i < pcTableSize; i++) SLNPInit(&pcTableLocks[i]); OverflowSetup(overflowFirst); SLNPInit(&overflowLock); procData = MyShmalloc(ConcurrentData, mNumProcIds); ConDataTableInit(procData, mNumProcIds); SLNPInit(&objLock); SLNPInit(&kidLock); endCount = numProfilers; SLNPInit(&startLock); effectiveParallelism = nominalParallelism = 1; } void ProfileSetAllBusy () { int i; for (i = 0; i < numProcessors; i++) { processorList[i].idleThread.idStack.top++; processorList[i].idleThread.idStack.top->procID = ForkID | BusyState; } effectiveParallelism = nominalParallelism = numProcessors; } void SetProfileOn () { profileOn = TRUE; } void SetProfileOff () { profileOn = FALSE; } /* Runtime profiling routines, for normal processors (eg, mcount) */ /* does the same thing as mcount, but ignore recursion */ void TPushOnIdStack (t, p, s) register Thread *t; register SynchProfile *p; unsigned int s; { register IdStackEntry *ePtr; unsigned int callerID; ASSERT(t->type == ThreadType); ASSERT(p->type == SynchProfileType); ePtr = t->idStack.top; ePtr->procID |= OverheadState; callerID = ePtr->procID & AllOffMask; if (p->g.callerID == callerID) AtomicIncrP(&(p->g.num)); else ProfileMustAdd((unsigned int)p, callerID, &p->g); if (ePtr >= t->idStack.limit) OutOfRoom(); /* die */ (ePtr + 1)->procID = NoID; ePtr->procID &= OverheadOffMask; t->idStack.top = ++ePtr; ePtr->procID = (s) | (int)p; } void CallAndReplaceOnIdStack (p, s) register SynchProfile *p; unsigned int s; { register Thread *t = pP.thread; register IdStackEntry *ePtr = t->idStack.top; unsigned int callerID; ASSERT(p->type == SynchProfileType); ePtr->procID |= OverheadState; callerID = ePtr->procID & AllOffMask; if (p->g.callerID == callerID) AtomicIncrP(&(p->g.num)); else ProfileMustAdd((unsigned int)p, callerID, &p->g); ReplaceOnIdStack(p,s); } /* nasty: how to make sure we're free from deadlock * on overflow, provided the procedures we call don't overflow * on interrupts */ void ProfileMustAdd (calleeID, callerID, p) unsigned int calleeID; unsigned int callerID; GraphEntry *p; { register GraphEntry *q; register SpinLock *l = NULL; register int old; for (q = p; q->calleeID != calleeID || q->callerID != callerID; q = q->next) { while (q->next == NULL) { if (!l) l = id2lock(calleeID); old = profileOn; profileOn = FALSE; /* in case we get an interrupt */ if (!SLNPTestAndGet(l)) { profileOn = old; continue; } if (q->next != NULL) { SLNPRelease(l); profileOn = old; break; } if (q->num != 0) /* have to get one from overflow */ { SLNPAcquire(&overflowLock); if (overflowSize == 0) OverflowSetup(MyShmalloc(GraphEntry,InitOverflowSize)); q->next = &overflow[--overflowSize]; SLNPRelease(&overflowLock); q = q->next; } q->num = 1; q->calleeID = calleeID; q->callerID = callerID; SLNPRelease(l); profileOn = old; return; } } AtomicIncrP(&(q->num)); } /* Runtime sampling routines */ /* return t1 - t2 */ static FLOAT ComputeDiff (t1, t2) usclk_t t1, t2; { usclk_t d; if (t1 < t2) d = t1 + (0xffffffff - t2); else d = t1 - t2; return((FLOAT)d); } static int Bound (n, lb, ub) int n, lb, ub; { if (n < lb) return(lb); if (n > ub) return(ub); return(n); } static int SampleStack (t, eff, nom) register Thread *t; int *eff, *nom; { int eP, nP; register IdStackEntry *e; register unsigned int *sp = tmpStack; /* Sampling begins */ eP = effectiveParallelism; nP = nominalParallelism; for (e = t->idStack.top; e >= t->idStack.base; e--, sp++) *sp = e->procID; /* Sampling ends */ *eff = Bound(eP, 1, MaxEffectiveParallelism) - 1; *nom = (nP < numProcessors) ? 0 : 1; iteration++; return(sp - tmpStack); } ChildData *GetChildData () { ChildData *k; SLNPAcquire(&kidLock); if (--kidNum < 0) { kidData = MyShmalloc(ChildData, InitNumKids); ChildTableInit(kidData, InitNumKids); kidNum = InitNumKids - 1; } k = &kidData[kidNum]; SLNPRelease(&kidLock); return(k); } SynchSamples *GetSampleSpace () { SynchSamples *s; SLNPAcquire(&objLock); if (--objNum < 0) { objData = MyShmalloc(SynchSamples, InitNumObjs); SynchTableInit(objData, InitNumObjs); objNum = InitNumObjs - 1; } s = &objData[objNum]; SLNPRelease(&objLock); return(s); } static void AddSample (data, diff, eP, nP, first, callee) ConcurrentData *data; FLOAT diff; int eP, nP, first; unsigned int callee; { ChildData *k; ASSERT(data->type == ConcurrentDataType && diff >= 0); ASSERT((eP >= 0 && eP < MaxEffectiveParallelism) && (nP == 0 || nP == 1)); if (data->hit[myHit] < iteration) { data->hit[myHit] = iteration; SLNPAcquire(&data->lock); data->nom.byNomP[MePlusKids][BUSY][nP] += diff; if (first) { data->busy.byEffP[eP] += diff; data->nom.byNomP[JustMe][BUSY][nP] += diff; } else /* mark where busy time came from */ { for (k = &data->kid; k->calleeID != callee; k = k->next) if (k->next == NULL) { if (k->calleeID != NoID) { k->next = GetChildData(); k = k->next; } k->calleeID = callee; break; } k->busy.byEffP[eP] += diff; } SLNPRelease(&data->lock); } } static void AddNomSample (data, diff, nP, state, first) ConcurrentData *data; FLOAT diff; int nP, state, first; { ASSERT(data->type == ConcurrentDataType && (nP == 0 || nP == 1)); ASSERT((state >= 0 || state < NumStates) && diff >= 0); if (data->hit[myHit] < iteration) { data->hit[myHit] = iteration; SLNPAcquire(&data->lock); if (first) data->nom.byNomP[JustMe][state][nP] += diff; data->nom.byNomP[MePlusKids][state][nP] += diff; SLNPRelease(&data->lock); } } static ConcurrentData *id2data (id) unsigned int id; { SynchProfile *p; id &= AllOffMask; if (isSynchID(id)) { p = (SynchProfile *)id; ASSERT(p->type == SynchProfileType); if (p->samples == NULL) p->samples = GetSampleSpace(); return(&p->samples->data); } return(&procData[id]); } static usclk_t ProfileProc (p) Processor *p; { int i, stackDepth, eP, nP; usclk_t next; FLOAT diff; next = GETUSCLK(); stackDepth = SampleStack(p->curThread, &eP, &nP); diff = ComputeDiff(next, p->lastSample); p->lastSample = next; if (stackDepth == 0 || isOverhead(tmpStack[0])) AddSample(&procData[NoID], diff, eP, nP, TRUE, NoID); else if (isSpinning(tmpStack[0])) for (i = 0; i < stackDepth; i++) AddNomSample(id2data(tmpStack[i]), diff, nP, SPIN, i == 0); else { AddSample(id2data(tmpStack[0]), diff, eP, nP, TRUE, NoID); for (i = stackDepth - 1; i > 0; i--) AddSample(id2data(tmpStack[i]), diff, eP, nP, FALSE, (unsigned int)(tmpStack[i-1] & AllOffMask)); } } static void ProfileSynch (p) SynchProfile *p; { int i, n[NumNumbers]; usclk_t next; FLOAT diff; register Thread *t; int stackDepth, eP, nP, type; if (p->status != ACTIVE) return; for (i = 0; i < NumNumbers; i++) n[i] = p->number[i]; next = GETUSCLK(); if (t = p->thread) stackDepth = SampleStack(t, &eP, &nP); diff = ComputeDiff(next, p->lastSample); if (t && stackDepth != 0 && !isOverhead(tmpStack[0]) && !isBusy(tmpStack[0]) && !isSpinning(tmpStack[0])) { if (isBlocked(tmpStack[0])) type = BLOCKED; else type = READY; for (i = 0; i < stackDepth; i++) AddNomSample(id2data(tmpStack[i]), diff, nP, type, i == 0); } if (!p->samples) p->samples = GetSampleSpace(); for (i = 0; i < NumNumbers; i++) { n[i] = Bound(n[i], 0, MaxNominalParallelism - 1); p->samples->queue.length[i][n[i]] += diff; } p->lastSample = next; } void ProfileExternal () { register int i; register Processor *p; register SynchProfile *s; SLNPAcquire(&startLock); if (!start) { start = TRUE; startTime = GETUSCLK(); for (i = 0; i < numProcessors; i++) processorList[i].lastSample = startTime; } SLNPRelease(&startLock); myHit = pP.myId - numProcessors; while (!profileOver) { for (i = 0; i < numProcessors && !profileOver; i++) { p = &processorList[i]; if (SLNPTestAndGet(&p->profLock)) { ProfileProc(p); for (s = p->synchList; s && !profileOver; s = s->next) ProfileSynch(s); p->numSamples++; SLNPRelease(&p->profLock); } } } SLNPAcquire(&startLock); if (--endCount == 0) /* wait for everybody to check in */ { #ifndef DEBUG KillAll(); #endif for (i = 0; i < numProcessors; i++) { timeDiff += ComputeDiff(processorList[i].lastSample, startTime); numSamples += processorList[i].numSamples; } timeDiff /= numProcessors; numSamples /= numProcessors; DumpInfo(); #ifdef DEBUG KillAll(); #endif } SLNPRelease(&startLock); exit(0); } void TooHigh () { printf("Fatal error: mcount() passed an out-of-bound processor ID.\n"); fflush(stdout); KillAll(); exit(1); } void OutOfRoom () { printf("Fatal error: profiler ID stack is out of room.\n"); fflush(stdout); KillAll(); exit(1); } void ProfileFinish () { profileOver = TRUE; }